Course 3 Occupancy models
Preparations
Load libraries
library(unmarked) #main package
library(terra) #needed for prediction maps
Set workspace, data directories
### The workspace (repetition)
getwd() # you can also use the package 'here()'
## [1] "C:/Users/sollmann/Documents/D6/Kurs Steph 2026/d6_teaching_collection-main/d6_teaching_collection-main/R/Course3_b_presence_absence_occupancy"
root_wd <- here::here()
# relative to work-wd
##detection data
data_wd <- paste(root_wd,"/","data/data_berlin/animal_data",sep='')
## covariate maps
maps_wd <- paste(root_wd,"/","data/data_berlin/geo_raster_current_gtif",sep='')
Load, explore the detection data
## read in detection data - example: raccoon
y<-readRDS(paste0(data_wd, '/raccoon_spring2019.RDS'))
## Basic data summaries:
##how many locations sampled?
J=nrow(y)
##how many repeat samples?
K=ncol(y)
##naive occupancy (number of sites with detections)
sum(apply(y,1,sum)>0)/nrow(y)
## [1] 0.4615385
##number of detections in each sampling occasion
apply(y,2,sum)
## occ1 occ2 occ3 occ4
## 35 34 36 25
Load, explore covariate data
## read in covariate data
covs<-readRDS(paste0(data_wd, '/covenv_spring2019.RDS'))
## look at covariates
head(covs)
## User_uid Local_tree_cover pop_100 dist_water_100 imperv_100 noise_100
## 2 381 40 102.7272034 647.3228 81.60653 52.22244
## 3 382 5 43.6494637 1107.4154 65.60398 61.72527
## 4 383 20 139.3569641 519.9907 70.26485 49.98725
## 5 389 33 39.5562325 125.3740 51.60028 58.01681
## 6 390 20 111.5275955 134.6375 49.80275 56.28348
## 7 391 1 0.1371695 168.7975 23.21507 35.46156
## tree_cover_100 lat_round lon_round
## 2 4.307139 52.61 13.43
## 3 29.130781 52.54 13.13
## 4 31.433018 52.61 13.42
## 5 39.235077 52.58 13.42
## 6 21.789064 52.58 13.42
## 7 26.736300 52.56 13.16
## we will create an observation covariate (varies across visits)
## to test whether detection probability increases or decreases
## over time (maybe animals are afraid of camera traps = novel
## objects initially, but gradually get used to them)
time<-matrix((0:3), nrow=J, ncol=K, byrow=T)
head(time)
## [,1] [,2] [,3] [,4]
## [1,] 0 1 2 3
## [2,] 0 1 2 3
## [3,] 0 1 2 3
## [4,] 0 1 2 3
## [5,] 0 1 2 3
## [6,] 0 1 2 3
##convert to dataframe
time.df<-data.frame(time)
colnames(time.df)<-paste0('time.', 1:4)
Occupancy modeling
Run simplest occpancy model - no covariates
##start with null model (no covariates)
m0<-occu(~1~1, data=umf)
summary(m0)
##
## Call:
## occu(formula = ~1 ~ 1, data = umf)
##
## Occupancy (logit-scale):
## Estimate SE z P(>|z|)
## 0.0381 0.192 0.198 0.843
##
## Detection (logit-scale):
## Estimate SE z P(>|z|)
## -0.217 0.144 -1.5 0.134
##
## AIC: 556.5455
## Number of sites: 143
##manual backtransformation - occupancy
exp(0.0381)/(1+exp(0.0381))
## [1] 0.5095238
## [1] 0.5095238
##compare to naive occu
## detection
plogis(-0.217)
## [1] 0.4459619
## better: use predict() function
head(predict(m0, type='state')) # state = occupancy
## Predicted SE lower upper
## 1 0.5095135 0.04800408 0.4161927 0.602176
## 2 0.5095135 0.04800408 0.4161927 0.602176
## 3 0.5095135 0.04800408 0.4161927 0.602176
## 4 0.5095135 0.04800408 0.4161927 0.602176
## 5 0.5095135 0.04800408 0.4161927 0.602176
## 6 0.5095135 0.04800408 0.4161927 0.602176
head(predict(m0, type='det')) # det = detection
## Predicted SE lower upper
## 1 0.4460575 0.03570078 0.3775881 0.5166368
## 2 0.4460575 0.03570078 0.3775881 0.5166368
## 3 0.4460575 0.03570078 0.3775881 0.5166368
## 4 0.4460575 0.03570078 0.3775881 0.5166368
## 5 0.4460575 0.03570078 0.3775881 0.5166368
## 6 0.4460575 0.03570078 0.3775881 0.5166368
##produces a value for each sampling location (and occasion)
##more useful for models with covariates
Run a model with an occupancy covariate
## run model with tree cover 100 on occupancy
m.tree100<-occu(~1~tree_cover_100, data=umf)
summary(m.tree100)
##
## Call:
## occu(formula = ~1 ~ tree_cover_100, data = umf)
##
## Occupancy (logit-scale):
## Estimate SE z P(>|z|)
## (Intercept) -0.8450 0.4385 -1.93 0.0540
## tree_cover_100 0.0258 0.0118 2.18 0.0291
##
## Detection (logit-scale):
## Estimate SE z P(>|z|)
## -0.219 0.145 -1.51 0.131
##
## AIC: 553.4082
## Number of sites: 143
## now, predict will calculate occu probability for each location
## based on its tree cover value
head(predict(m.tree100, type='state')) # state = occupancy
## Predicted SE lower upper
## 1 0.3243848 0.08628395 0.1816305 0.5094871
## 2 0.4769126 0.05066007 0.3797919 0.5758111
## 3 0.4917655 0.04963342 0.3959987 0.5881405
## 4 0.5420535 0.05229126 0.4392358 0.6414100
## 5 0.4299448 0.05846560 0.3209048 0.5462334
## 6 0.4615081 0.05253579 0.3615596 0.5646500
##we can use this to plot occupancy probability as a function
## of tree cover
pred.tree<-predict(m.tree100, type='state')
plot(covs$tree_cover_100, pred.tree$Predicted)

##IMPORTANT: this simple approach only works for models with
## only one covariate
Run a model with multiple covariates on occupancy
## let's look at an example with two covariates:
## tree cover at the local and the 100-m scale
m.tree2<-occu(~1~tree_cover_100+Local_tree_cover, data=umf)
pred.tree2<-predict(m.tree2, type='state')
plot(covs$tree_cover_100, pred.tree2$Predicted)

##this happens because predict considers both variables when
## calculating the occupancy probability at each location
## but we can only plot against one covariate
## using predict when there is more than one covariate
## create a data frame with new predictor values
## create a range of values for the predictor you want to look at
## keep a fixed value (0 or the mean) for the other predictor(s)
## NOTE: ALL predictors that are in the model must be in the new
## data frame; the column names must be identical to those
## in the data
new.cov<-data.frame(tree_cover_100 = seq(0,100,1),#0-100
Local_tree_cover = mean(covs$Local_tree_cover))
pred.tree3<-predict(m.tree2,newdata=new.cov, type='state')
plot(new.cov$tree_cover_100, pred.tree3$Predicted)

Run a model with a detection covariate
## now let's check for an effect of time on detection
m.time<-occu(~time~1, data=umf)
summary(m.time)
##
## Call:
## occu(formula = ~time ~ 1, data = umf)
##
## Occupancy (logit-scale):
## Estimate SE z P(>|z|)
## 0.0342 0.192 0.179 0.858
##
## Detection (logit-scale):
## Estimate SE z P(>|z|)
## (Intercept) 0.0198 0.217 0.0913 0.927
## time -0.1564 0.106 -1.4714 0.141
##
## AIC: 556.3619
## Number of sites: 143
pred.time<-predict(m.time, type='det')
plot(as.vector(t(time)), pred.time$Predicted)

## or using new data
new.timecov<-data.frame(time=0:3)
pred.time2<-predict(m.time,newdata=new.timecov, type='det')
plot(new.timecov$time, pred.time2$Predicted)

Model selection via AIC
##fit multiple models: here with and without time,
## with and without tree cover
## (top three are repeats from above)
m0<-occu(~1~1, data=umf)
m.tree100<-occu(~1~tree_cover_100, data=umf)
m.time<-occu(~time~1, data=umf)
m.tree.time<-occu(~time~tree_cover_100, data=umf)
##which of these is the best model?
##create a named fitlist
fl<-fitList(null=m0,
tree=m.tree100,
time=m.time,
treeTime=m.tree.time)
##perform AIC based model selection
modSel(fl)
## nPars AIC delta AICwt cumltvWt
## treeTime 4 553.23 0.00 0.433 0.43
## tree 3 553.41 0.18 0.395 0.83
## time 3 556.36 3.14 0.090 0.92
## null 2 556.55 3.32 0.082 1.00
## treeTime and tree are very similar
## and both are better than the two models without tree cover
Prediction maps
Read in treecover raster
## Read in raster of tree cover at 100-m scale
tree.berlin<-rast(paste0(maps_wd,'/tree_cover_density_2012_100m_3035.tif'))
## Look at raster
plot(tree.berlin)

Create prediction raster (map)
## create duplicate of treecover data (this maintains resolution, extent etc)
raccoon.occu<-tree.berlin
## replace treecover values with predictions of raccoon occupancy
## again, only for non-NA values
values(raccoon.occu)[!is.na(values(tree.berlin))]<-pred.berlin$Predicted
## plot occupancy raster
plot(raccoon.occu)

Session Info
## [1] "2026-02-02 11:43:55 CET"
#git2r::repository() ## uncomment if you are using GitHub
sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 10 x64 (build 19044)
##
## Matrix products: default
## LAPACK version 3.12.1
##
## locale:
## [1] LC_COLLATE=German_Germany.utf8 LC_CTYPE=German_Germany.utf8
## [3] LC_MONETARY=German_Germany.utf8 LC_NUMERIC=C
## [5] LC_TIME=C
##
## time zone: Europe/Berlin
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] terra_1.8-70 unmarked_1.5.1
##
## loaded via a namespace (and not attached):
## [1] cli_3.6.5 knitr_1.50 rlang_1.1.6 xfun_0.54
## [5] reformulas_0.4.1 textshaping_1.0.3 jsonlite_2.0.0 rprojroot_2.1.1
## [9] htmltools_0.5.9 ragg_1.5.0 sass_0.4.10 rmarkdown_2.30
## [13] grid_4.5.1 evaluate_1.0.5 jquerylib_0.1.4 MASS_7.3-65
## [17] rmdformats_1.0.4 fastmap_1.2.0 yaml_2.3.11 lifecycle_1.0.4
## [21] bookdown_0.46 compiler_4.5.1 codetools_0.2-20 Rcpp_1.1.0
## [25] here_1.0.2 rstudioapi_0.17.1 systemfonts_1.3.0 lattice_0.22-7
## [29] digest_0.6.39 R6_2.6.1 Rdpack_2.6.4 parallel_4.5.1
## [33] rbibutils_2.3 Matrix_1.7-3 bslib_0.9.0 tools_4.5.1
## [37] cachem_1.1.0